The code is structured to simulate ad allocation strategies across multiple platforms using the Upper Confidence Bound (UCB) algorithm. The main objective of this implementation is to estimate click probabilities, cumulative revenue, and revenue per ad across different configurations of firms (K), sites (L), and dimensions (d).
Initialization of Vectors: The first step in the code involves initializing the click probabilities for each firm and the traffic distribution for each site. The click probabilities are generated randomly for each firm, while the site traffic is modeled using an exponential distribution. This simulates varying levels of user engagement across different sites, which is then normalized to ensure the traffic sums to one.
Click Probability Calculation: The code calculates the probability that a click will result from a specific firm’s ad on a specific site. This is done by computing the outer product of the initialized vectors, resulting in a matrix where each element represents the click probability for a given firm-site pair.
UCB Selection Strategy: The UCB algorithm is implemented to select the site that maximizes the upper confidence bound of the estimated click probability. This approach balances exploration (trying out different platforms) with exploitation (continuing to invest in platforms that yield high returns). The algorithm updates the estimated probabilities and click counts as it iterates through the time horizon.
Simulation Execution: The simulation is run across various configurations defined by different values of dimensions (d), number of sites (L), and number of firms (K). For each configuration, the simulation calculates the estimated click probabilities, the number of visits, clicks, and cumulative revenue over time. These results are stored for subsequent analysis and visualization.
Visualization of Results: The final part of the code generates visualizations to compare the estimated click probabilities against the true probabilities, track cumulative revenue over time, and assess revenue distribution per ad. These visualizations help in understanding the effectiveness of the UCB algorithm across different scenarios.
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
def initialize_vectors(K, L):
# Click probabilities for firms
c = np.random.rand(K)
# Site traffic (exponential distribution)
s = np.random.exponential(scale=1.0, size=L)
s = s / np.sum(s) # Normalize to get relative traffic
return c, s
def click_probability(c, s):
return np.outer(c, s)
def ucb_selection(K, L, c, s, horizon, alpha=1.0):
visits = np.zeros((K, L), dtype=int)
clicks = np.zeros((K, L), dtype=int)
estimated_prob = np.zeros((K, L))
for t in range(1, horizon + 1):
ucb_values = estimated_prob + alpha * np.sqrt(np.log(t) / (visits + 1))
i, j = np.unravel_index(np.argmax(ucb_values), ucb_values.shape)
click_prob = click_probability(c, s)[i, j]
click = np.random.rand() < click_prob
visits[i, j] += 1
clicks[i, j] += click
estimated_prob[i, j] = clicks[i, j] / visits[i, j]
return estimated_prob, visits, clicks
def run_simulation(d, L, horizon, K=100):
c, s = initialize_vectors(K, L)
estimated_prob, visits, clicks = ucb_selection(K, L, c, s, horizon)
return estimated_prob, visits, clicks, c, s
# Parameters
d_list = [1, 1, 1, 2, 2, 2, 10, 10, 10]
L_list = [1, 2, 10, 1, 2, 10, 2, 10, 20]
horizon = 10000
K = 100
results = {}
for d, L in zip(d_list, L_list):
estimated_prob, visits, clicks, c, s = run_simulation(d, L, horizon, K)
true_prob = click_probability(c, s)
results[(d, L)] = {
'estimated_prob': estimated_prob,
'visits': visits,
'clicks': clicks,
'true_prob': true_prob,
'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
}
# Visualization of Results
fig_scatter = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_revenue = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_bar = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
for i, ((d, L), result) in enumerate(results.items()):
row = i // 3 + 1
col = i % 3 + 1
# Scatter plot for estimated vs. true click probabilities
scatter_data = pd.DataFrame({
'True Probability': result['true_prob'].flatten(),
'Estimated Probability': result['estimated_prob'].flatten()
})
fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)
# Line plot for cumulative revenue over time
fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)
# Bar plot for revenue per ad
ad_revenue = result['clicks'] * np.random.rand(K, L)
bar_data = pd.DataFrame({
'Ad': range(K * L),
'Revenue': ad_revenue.flatten()
})
fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)
fig_scatter.update_layout(height=1200, width=1200, title_text="Estimated vs True Click Probabilities")
fig_revenue.update_layout(height=1200, width=1200, title_text="Cumulative Revenue over Time")
fig_bar.update_layout(height=1200, width=1200, title_text="Revenue per Ad")
fig_scatter.show()
fig_revenue.show()
fig_bar.show()
This code is designed to run simulations using the Upper Confidence Bound (UCB) algorithm to estimate click probabilities, cumulative revenue, and revenue per ad across multiple configurations of firms (K), sites (L), and dimensions (d). The key steps in the implementation include:
Vector Initialization: The code begins by initializing vectors that represent the click probabilities for each firm and the traffic distribution for each site. The click probabilities are randomly generated, while the site traffic is modeled using an exponential distribution, which is then normalized. This setup simulates the varying attractiveness or user engagement of different sites.
UCB Selection Strategy: The UCB algorithm is employed to determine which site a firm should select to place its ad. The UCB value is computed based on the estimated click probabilities and the number of times a site has been visited, with an exploration parameter influencing the balance between trying new sites and exploiting known profitable sites. This strategy helps the algorithm adaptively explore the best sites while maximizing returns.
Running Simulations: The simulation is performed across a range of scenarios defined by different values of dimensions (d) and sites (L). For each scenario, the algorithm runs through a predefined horizon (number of time steps), during which it updates the estimated probabilities, records the number of visits, counts the clicks, and tracks cumulative revenue. This allows the model to simulate different advertising environments and observe how the UCB algorithm performs under various conditions.
Result Visualization: After the simulations are complete, the results are visualized in three different ways:
def thompson_sampling(K, L, c, s, horizon, a_prior=1, b_prior=1):
visits = np.zeros((K, L), dtype=int)
clicks = np.zeros((K, L), dtype=int)
estimated_prob = np.zeros((K, L))
a = a_prior * np.ones((K, L))
b = b_prior * np.ones((K, L))
for t in range(1, horizon + 1):
sampled_theta = np.random.beta(a, b)
i, j = np.unravel_index(np.argmax(sampled_theta), sampled_theta.shape)
click_prob = click_probability(c, s)[i, j]
click = np.random.rand() < click_prob
visits[i, j] += 1
clicks[i, j] += click
estimated_prob[i, j] = clicks[i, j] / visits[i, j]
# Update beta distribution parameters
a[i, j] += click
b[i, j] += 1 - click
return estimated_prob, visits, clicks
def run_simulation(d, L, horizon, K=100):
c, s = initialize_vectors(K, L)
estimated_prob, visits, clicks = thompson_sampling(K, L, c, s, horizon)
return estimated_prob, visits, clicks, c, s
# Parameters
d_list = [1, 1, 1, 2, 2, 2, 10, 10, 10]
L_list = [1, 2, 10, 1, 2, 10, 2, 10, 20]
horizon = 10000
K = 100
results = {}
for d, L in zip(d_list, L_list):
estimated_prob, visits, clicks, c, s = run_simulation(d, L, horizon, K)
true_prob = click_probability(c, s)
results[(d, L)] = {
'estimated_prob': estimated_prob,
'visits': visits,
'clicks': clicks,
'true_prob': true_prob,
'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
}
# Visualization of Results
fig_scatter = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_revenue = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_bar = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
for i, ((d, L), result) in enumerate(results.items()):
row = i // 3 + 1
col = i % 3 + 1
# Scatter plot for estimated vs. true click probabilities
scatter_data = pd.DataFrame({
'True Probability': result['true_prob'].flatten(),
'Estimated Probability': result['estimated_prob'].flatten()
})
fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)
# Line plot for cumulative revenue over time
fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)
# Bar plot for revenue per ad
ad_revenue = result['clicks'] * np.random.rand(K, L)
bar_data = pd.DataFrame({
'Ad': range(K * L),
'Revenue': ad_revenue.flatten()
})
fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)
fig_scatter.update_layout(height=1200, width=1200, title_text="Estimated vs True Click Probabilities (Thompson Sampling)")
fig_revenue.update_layout(height=1200, width=1200, title_text="Cumulative Revenue over Time (Thompson Sampling)")
fig_bar.update_layout(height=1200, width=1200, title_text="Revenue per Ad (Thompson Sampling)")
fig_scatter.show()
fig_revenue.show()
fig_bar.show()
This code is designed to compare the performance of two algorithms—Thompson Sampling and Upper Confidence Bound (UCB)—in a simulated environment where firms allocate their ads across multiple platforms (sites). The goal is to estimate click probabilities, track cumulative revenue, and assess revenue distribution per ad under various scenarios.
Thompson Sampling:
UCB Algorithm:
Simulation Execution:
run_simulation function is a wrapper that allows for the execution of both Thompson Sampling and UCB algorithms across different configurations defined by the number of sites (L) and firms (K). The function initializes the click probabilities and site traffic, then applies the selected algorithm (either Thompson or UCB) over a specified time horizon.Result Visualization:
def thompson_sampling(K, L, c, s, horizon, a_prior=1, b_prior=1):
visits = np.zeros((K, L), dtype=int)
clicks = np.zeros((K, L), dtype=int)
estimated_prob = np.zeros((K, L))
a = a_prior * np.ones((K, L))
b = b_prior * np.ones((K, L))
for t in range(1, horizon + 1):
sampled_theta = np.random.beta(a, b)
i, j = np.unravel_index(np.argmax(sampled_theta), sampled_theta.shape)
click_prob = click_probability(c, s)[i, j]
click = np.random.rand() < click_prob
visits[i, j] += 1
clicks[i, j] += click
estimated_prob[i, j] = clicks[i, j] / visits[i, j]
# Update beta distribution parameters
a[i, j] += click
b[i, j] += 1 - click
return estimated_prob, visits, clicks
def ucb_selection(K, L, c, s, horizon, alpha=1.0):
visits = np.zeros((K, L), dtype=int)
clicks = np.zeros((K, L), dtype=int)
estimated_prob = np.zeros((K, L))
for t in range(1, horizon + 1):
ucb_values = estimated_prob + alpha * np.sqrt(np.log(t) / (visits + 1))
i, j = np.unravel_index(np.argmax(ucb_values), ucb_values.shape)
click_prob = click_probability(c, s)[i, j]
click = np.random.rand() < click_prob
visits[i, j] += 1
clicks[i, j] += click
estimated_prob[i, j] = clicks[i, j] / visits[i, j]
return estimated_prob, visits, clicks
def run_simulation(algorithm, d, L, horizon, K=100):
c, s = initialize_vectors(K, L)
if algorithm == 'Thompson':
estimated_prob, visits, clicks = thompson_sampling(K, L, c, s, horizon)
elif algorithm == 'UCB':
estimated_prob, visits, clicks = ucb_selection(K, L, c, s, horizon)
return estimated_prob, visits, clicks, c, s
# Parameters
d = 1
L_list = [1, 2, 10]
horizon = 10000
K = 100
algorithms = ['Thompson', 'UCB']
results = {}
for algorithm in algorithms:
for L in L_list:
estimated_prob, visits, clicks, c, s = run_simulation(algorithm, d, L, horizon, K)
true_prob = click_probability(c, s)
results[(algorithm, L)] = {
'estimated_prob': estimated_prob,
'visits': visits,
'clicks': clicks,
'true_prob': true_prob,
'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
}
# Visualization of Results
fig_scatter = make_subplots(rows=2, cols=3, subplot_titles=[f'{algo}, d={d}, L={L}' for algo in algorithms for L in L_list])
fig_revenue = make_subplots(rows=2, cols=3, subplot_titles=[f'{algo}, d={d}, L={L}' for algo in algorithms for L in L_list])
fig_bar = make_subplots(rows=2, cols=3, subplot_titles=[f'{algo}, d={d}, L={L}' for algo in algorithms for L in L_list])
for i, ((algorithm, L), result) in enumerate(results.items()):
row = i // 3 + 1
col = i % 3 + 1
# Scatter plot for estimated vs. true click probabilities
scatter_data = pd.DataFrame({
'True Probability': result['true_prob'].flatten(),
'Estimated Probability': result['estimated_prob'].flatten()
})
fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)
# Line plot for cumulative revenue over time
fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)
# Bar plot for revenue per ad
ad_revenue = result['clicks'] * np.random.rand(K, L)
bar_data = pd.DataFrame({
'Ad': range(K * L),
'Revenue': ad_revenue.flatten()
})
fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)
fig_scatter.update_layout(height=800, width=1200, title_text="Estimated vs True Click Probabilities (Thompson vs UCB)")
fig_revenue.update_layout(height=800, width=1200, title_text="Cumulative Revenue over Time (Thompson vs UCB)")
fig_bar.update_layout(height=800, width=1200, title_text="Revenue per Ad (Thompson vs UCB)")
fig_scatter.show()
fig_revenue.show()
fig_bar.show()
This code implements and simulates a novel Adaptive Exploration-Exploitation (AEE) strategy, combining elements from both the Upper Confidence Bound (UCB) and Thompson Sampling algorithms. The goal is to dynamically balance exploration and exploitation more effectively, optimizing ad allocation across multiple platforms (sites) over time.
Adaptive Exploration-Exploitation (AEE) Strategy:
Running Simulations:
run_simulation_adaptive function is used to execute the AEE strategy across various configurations defined by different numbers of sites (L). The function initializes the click probabilities and site traffic, and then applies the AEE strategy over a specified time horizon.Result Visualization:
def adaptive_exploration_exploitation(K, L, c, s, horizon, alpha=1.0, beta=0.5):
visits = np.zeros((K, L), dtype=int)
clicks = np.zeros((K, L), dtype=int)
estimated_prob = np.zeros((K, L))
a = np.ones((K, L))
b = np.ones((K, L))
for t in range(1, horizon + 1):
ucb_values = estimated_prob + alpha * np.sqrt(np.log(t) / (visits + 1))
sampled_theta = np.random.beta(a, b)
# Adaptive selection: Use Thompson Sampling for underexplored arms, UCB for others
explore_exploit_threshold = np.sqrt(np.log(t) / (visits + 1))
combined_scores = beta * sampled_theta + (1 - beta) * ucb_values
i, j = np.unravel_index(np.argmax(combined_scores), combined_scores.shape)
click_prob = click_probability(c, s)[i, j]
click = np.random.rand() < click_prob
visits[i, j] += 1
clicks[i, j] += click
estimated_prob[i, j] = clicks[i, j] / visits[i, j]
# Update the beta distribution parameters for Thompson Sampling
a[i, j] += click
b[i, j] += 1 - click
return estimated_prob, visits, clicks
def run_simulation_adaptive(d, L, horizon, K=100):
c, s = initialize_vectors(K, L)
estimated_prob, visits, clicks = adaptive_exploration_exploitation(K, L, c, s, horizon)
return estimated_prob, visits, clicks, c, s
# Parameters
d = 1
L_list = [1, 2, 10]
horizon = 10000
K = 100
results_adaptive = {}
for L in L_list:
estimated_prob, visits, clicks, c, s = run_simulation_adaptive(d, L, horizon, K)
true_prob = click_probability(c, s)
results_adaptive[(d, L)] = {
'estimated_prob': estimated_prob,
'visits': visits,
'clicks': clicks,
'true_prob': true_prob,
'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
}
# Visualization of Results
fig_scatter = make_subplots(rows=1, cols=3, subplot_titles=[f'd={d}, L={L}' for L in L_list])
fig_revenue = make_subplots(rows=1, cols=3, subplot_titles=[f'd={d}, L={L}' for L in L_list])
fig_bar = make_subplots(rows=1, cols=3, subplot_titles=[f'd={d}, L={L}' for L in L_list])
for i, (key, result) in enumerate(results_adaptive.items()):
L = key[1] # Extract the L value correctly
row = 1
col = i + 1
# Scatter plot for estimated vs. true click probabilities
scatter_data = pd.DataFrame({
'True Probability': result['true_prob'].flatten(),
'Estimated Probability': result['estimated_prob'].flatten()
})
fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)
# Line plot for cumulative revenue over time
fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)
# Bar plot for revenue per ad
ad_revenue = result['clicks'] * np.random.rand(K, L) # Make sure L is an integer
bar_data = pd.DataFrame({
'Ad': range(K * L),
'Revenue': ad_revenue.flatten()
})
fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)
fig_scatter.update_layout(height=400, width=1200, title_text="Estimated vs True Click Probabilities (Adaptive AEE)")
fig_revenue.update_layout(height=400, width=1200, title_text="Cumulative Revenue over Time (Adaptive AEE)")
fig_bar.update_layout(height=400, width=1200, title_text="Revenue per Ad (Adaptive AEE)")
fig_scatter.show()
fig_revenue.show()
fig_bar.show()